{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "RU553Jumx05t"
   },
   "source": [
    "# RELEVAGAN"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "CRWaG6qPWj1T"
   },
   "source": [
    "<a id=\"CGAN\"><h1>Import Header</h1></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 4298,
     "status": "ok",
     "timestamp": 1646286467366,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "adrd84AgicLV",
    "outputId": "5efbe38a-5301-44fd-d78b-ccb5e21cd524"
   },
   "outputs": [],
   "source": [
    "# from google.colab import drive \n",
    "# drive.mount('/content/drive')\n",
    "# %cd /content/drive/MyDrive/PhD/Development/code/RELEVAGAN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1391,
     "status": "ok",
     "timestamp": 1646286468746,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "mTwKRaEVjJ8c",
    "outputId": "919b98ff-2212-4330-ea6a-1b32ba525777"
   },
   "outputs": [],
   "source": [
    "# !pip install tensorflow\n",
    "# !pip install gym\n",
    "# !pip install keras\n",
    "# !pip install keras-rl2\n",
    "\n",
    "# !pip install keras-rl2\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "executionInfo": {
     "elapsed": 24,
     "status": "ok",
     "timestamp": 1646286468748,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "ljvKEMMhp8lR"
   },
   "outputs": [],
   "source": [
    "# %cd /content/drive/My Drive/PhD/Development/code/RELEVAGAN\n",
    "# !ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "executionInfo": {
     "elapsed": 4741,
     "status": "ok",
     "timestamp": 1646286473469,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "ZlXu5pxhWj1b",
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/nu/anaconda3/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  from pandas import MultiIndex, Int64Index\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import importlib\n",
    "import header\n",
    "\n",
    "importlib.reload(header) # For reloading after making changes\n",
    "from header import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "id": "aQkaphklFIYW"
   },
   "source": [
    "<a id=\"CGAN\"><h1>Select GAN and Dataset and Flags</h1></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "executionInfo": {
     "elapsed": 34,
     "status": "ok",
     "timestamp": 1646286473470,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "kPfnVUMHWj1h"
   },
   "outputs": [],
   "source": [
    "GAN_type = 'RELEVAGAN_CC'\n",
    "# GAN_type = 'ACGAN_CV'\n",
    "# GAN_type = 'EVAGAN_CV'\n",
    "\n",
    "\n",
    "# DATA_SET = 'ISCX-2014'\n",
    "# DATA_SET = 'CIC-2017'\n",
    "DATA_SET = 'CIC-2018'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4zsYaqGjWj1j"
   },
   "source": [
    "<a id=\"GPU Settings\"><h2>Set Flags</h2></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "executionInfo": {
     "elapsed": 31,
     "status": "ok",
     "timestamp": 1646286473471,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "S1cf7rM3Wj1k"
   },
   "outputs": [],
   "source": [
    "begin_from_start = 1\n",
    "take_chunk = 0\n",
    "required_epochs = 150\n",
    "\n",
    "DISPLAY_FEATURES = 0\n",
    "EVALUATION_PARAMETER = 'Accuracy'\n",
    "SAVE_ONLY_BOT_DATA = 0\n",
    "USE_KMEANS_FOR_CLASSIFICATION = 1\n",
    "\n",
    "BALANCE_THE_DATASET = 1\n",
    "\n",
    "labels =[]\n",
    "\n",
    "USE_ONLY_TRAIN_SET = 1\n",
    "\n",
    "USE_ALL_CLASSIFIERS = 0\n",
    "\n",
    "ACCU_EVAL_TEST = 0\n",
    "RCL_EVAL_TEST = 0\n",
    "\n",
    "VISUAL_TEST_OVERLAPPING = 1\n",
    "\n",
    "CSV_ONE_BOT = 0\n",
    "\n",
    "VIEW_ALL_BOTS = 0\n",
    "\n",
    "CTU_NERIS = 0\n",
    "\n",
    "SINGLE_WEIGHT_CLASSIFIER_TEST_C2ST = 0\n",
    "SINGLE_WEIGHT_CLASSIFIER_TEST_PROPOSED_METHODOLOGY = 0\n",
    "\n",
    "C2ST_BLACK_BOX_TEST = 0\n",
    "BOTSHOT_BLACK_BOX_TEST = 0\n",
    "\n",
    "C2ST_BLACK_BOX_TEST_AFTER_GAN_TRAINING = 0\n",
    "BOTSHOT_BLACK_BOX_TEST_AFTER_GAN_TRAINING = 0\n",
    "\n",
    "GENERATE_OTHERS_DATA = 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "gbFYC_WQWj1n"
   },
   "source": [
    "<a id=\"CGAN\"><h1>Set Paths</h1></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "executionInfo": {
     "elapsed": 30,
     "status": "ok",
     "timestamp": 1646286473472,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "8Al8rt2iWj1p"
   },
   "outputs": [],
   "source": [
    "MAIN_CODE_PATH = os.getcwd()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "executionInfo": {
     "elapsed": 27,
     "status": "ok",
     "timestamp": 1646286473472,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "WrizUpKsYxel"
   },
   "outputs": [],
   "source": [
    "DATA_SET_PATH = MAIN_CODE_PATH + '/Dataset/' +  DATA_SET + '/'\n",
    "CACHE_PATH = MAIN_CODE_PATH + '/cache/' + GAN_type + '/'\n",
    "FIGS_PATH = MAIN_CODE_PATH  + '/figs/' + GAN_type + '/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 27,
     "status": "ok",
     "timestamp": 1646286473473,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "yT1YS8U9Wj1t",
    "outputId": "e4079158-267c-4b8c-f6c5-241c2570f61b",
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/nu/Insync/rhr407@gmail.com/Google Drive/PhD/Development/code/RELEVAGAN_DQN_Agent/Dataset/CIC-2018/\n",
      "/home/nu/Insync/rhr407@gmail.com/Google Drive/PhD/Development/code/RELEVAGAN_DQN_Agent/cache/RELEVAGAN_CC/\n",
      "/home/nu/Insync/rhr407@gmail.com/Google Drive/PhD/Development/code/RELEVAGAN_DQN_Agent/figs/RELEVAGAN_CC/\n"
     ]
    }
   ],
   "source": [
    "print(DATA_SET_PATH)\n",
    "print(CACHE_PATH)\n",
    "print(FIGS_PATH)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "IwGuWbiSWj1y"
   },
   "source": [
    "<a id=\"GPU Settings\"><h2>Check Available GPUs</h2></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 24,
     "status": "ok",
     "timestamp": 1646286473474,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "jwQXSblnWj10",
    "outputId": "6dc71bbf-2f29-444d-83ae-66148060c55e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num GPUs Available:  3\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "print(\"Num GPUs Available: \", len(tf.config.experimental.list_physical_devices('GPU')))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ZzSPR7q5Wj11"
   },
   "source": [
    "<a id=\"GPU Settings\"><h2>Import Dataset</h2></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 21,
     "status": "ok",
     "timestamp": 1646286473475,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "w0MQAWmiWj12",
    "outputId": "82533b26-0223-4159-9f62-b488faf6f8c0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/nu/Insync/rhr407@gmail.com/Google Drive/PhD/Development/code/RELEVAGAN_DQN_Agent/Dataset/CIC-2018\n",
      " Friday-02-03-2018_TrafficForML_CICFlowMeter.csv\r\n",
      "'Friday-02-03-2018_TrafficForML_CICFlowMeter.csv_(Preprocessed).csv'\r\n"
     ]
    }
   ],
   "source": [
    "%cd $DATA_SET_PATH\n",
    "!ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 5361,
     "status": "ok",
     "timestamp": 1646286478822,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "NIJpUfZdWj15",
    "outputId": "f289caa0-4318-4fca-f55a-3a16e3bd78e8",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing File: Friday-02-03-2018_TrafficForML_CICFlowMeter.csv DATA shape: (1048575, 80)\n",
      "The shape of dataset is:  (1048575, 80)\n",
      "Before Preprocesing: Total: (1048575, 80)\n",
      "Before Preprocesing: Normal: (762384, 80)\n",
      "Before Preprocesing: Bots: (286191, 80)\n",
      "Data Shape before droping NULL and INF values: \n",
      "(1048575, 80)\n",
      "Data Shape after droping NULL and INF values: \n",
      "(1044525, 80)\n",
      "data_df after removing categorical features\n",
      "(1044525, 77)\n",
      "data_df after removing Label column\n",
      "(1044525, 76)\n",
      " Data Columns after converting to Float\n",
      "       Flow Duration  Tot Fwd Pkts  Tot Bwd Pkts  TotLen Fwd Pkts  \\\n",
      "count   1.044525e+06  1.044525e+06  1.044525e+06     1.044525e+06   \n",
      "mean    1.155885e+07  5.902530e+00  7.010040e+00     3.935511e+02   \n",
      "std     3.017092e+07  8.920527e+01  2.133700e+02     2.163833e+03   \n",
      "min     1.000000e+00  1.000000e+00  0.000000e+00     0.000000e+00   \n",
      "25%     5.190000e+02  2.000000e+00  0.000000e+00     0.000000e+00   \n",
      "50%     1.127400e+04  3.000000e+00  1.000000e+00     6.400000e+01   \n",
      "75%     2.221014e+06  7.000000e+00  5.000000e+00     3.690000e+02   \n",
      "max     1.200000e+08  4.315900e+04  6.924100e+04     1.100627e+06   \n",
      "\n",
      "       TotLen Bwd Pkts  Fwd Pkt Len Max  Fwd Pkt Len Min  Fwd Pkt Len Mean  \\\n",
      "count     1.044525e+06     1.044525e+06     1.044525e+06      1.044525e+06   \n",
      "mean      5.291713e+03     2.111953e+02     8.273807e+00      5.479855e+01   \n",
      "std       3.094749e+05     2.671786e+02     2.098046e+01      5.765815e+01   \n",
      "min       0.000000e+00     0.000000e+00     0.000000e+00      0.000000e+00   \n",
      "25%       0.000000e+00     0.000000e+00     0.000000e+00      0.000000e+00   \n",
      "50%       1.290000e+02     4.600000e+01     0.000000e+00      4.000000e+01   \n",
      "75%       5.820000e+02     3.260000e+02     0.000000e+00      1.086667e+02   \n",
      "max       1.010000e+08     1.711000e+03     1.460000e+03      1.460000e+03   \n",
      "\n",
      "       Fwd Pkt Len Std  Bwd Pkt Len Max  ...  Fwd Act Data Pkts  \\\n",
      "count     1.044525e+06     1.044525e+06  ...       1.044525e+06   \n",
      "mean      7.823176e+01     3.484667e+02  ...       2.052119e+00   \n",
      "std       9.701599e+01     5.042396e+02  ...       1.395102e+01   \n",
      "min       0.000000e+00     0.000000e+00  ...       0.000000e+00   \n",
      "25%       0.000000e+00     0.000000e+00  ...       0.000000e+00   \n",
      "50%       0.000000e+00     1.120000e+02  ...       1.000000e+00   \n",
      "75%       1.882162e+02     4.880000e+02  ...       3.000000e+00   \n",
      "max       1.032376e+03     1.460000e+03  ...       9.262000e+03   \n",
      "\n",
      "       Fwd Seg Size Min   Active Mean    Active Std    Active Max  \\\n",
      "count      1.044525e+06  1.044525e+06  1.044525e+06  1.044525e+06   \n",
      "mean       1.744416e+01  1.234346e+05  6.154744e+04  1.912618e+05   \n",
      "std        5.187126e+00  2.087119e+06  1.264132e+06  2.766778e+06   \n",
      "min        0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "25%        2.000000e+01  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "50%        2.000000e+01  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "75%        2.000000e+01  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "max        4.400000e+01  1.110000e+08  7.490000e+07  1.110000e+08   \n",
      "\n",
      "         Active Min     Idle Mean      Idle Std      Idle Max      Idle Min  \n",
      "count  1.044525e+06  1.044525e+06  1.044525e+06  1.044525e+06  1.044525e+06  \n",
      "mean   8.337649e+04  4.799736e+06  1.068351e+05  4.882636e+06  4.695151e+06  \n",
      "std    1.763232e+06  1.744425e+07  1.465423e+06  1.759949e+07  1.736415e+07  \n",
      "min    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "25%    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "50%    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "75%    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "max    1.110000e+08  1.200000e+08  7.590000e+07  1.200000e+08  1.200000e+08  \n",
      "\n",
      "[8 rows x 76 columns]\n",
      "INF values before removing: Int64Index([], dtype='int64')\n",
      "INF values removed and Data reindexed\n",
      "Data before removing std = 0 columns\n",
      "(1044525, 76)\n",
      "(1044525, 77)\n",
      "Data after removing std = 0 columns\n",
      "(1044525, 67)\n",
      "       Flow Duration  Tot Fwd Pkts  Tot Bwd Pkts  TotLen Fwd Pkts  \\\n",
      "count   1.044525e+06  1.044525e+06  1.044525e+06     1.044525e+06   \n",
      "mean    1.155885e+07  5.902530e+00  7.010040e+00     3.935511e+02   \n",
      "std     3.017092e+07  8.920527e+01  2.133700e+02     2.163833e+03   \n",
      "min     1.000000e+00  1.000000e+00  0.000000e+00     0.000000e+00   \n",
      "25%     5.190000e+02  2.000000e+00  0.000000e+00     0.000000e+00   \n",
      "50%     1.127400e+04  3.000000e+00  1.000000e+00     6.400000e+01   \n",
      "75%     2.221014e+06  7.000000e+00  5.000000e+00     3.690000e+02   \n",
      "max     1.200000e+08  4.315900e+04  6.924100e+04     1.100627e+06   \n",
      "\n",
      "       TotLen Bwd Pkts  Fwd Pkt Len Max  Fwd Pkt Len Min  Fwd Pkt Len Mean  \\\n",
      "count     1.044525e+06     1.044525e+06     1.044525e+06      1.044525e+06   \n",
      "mean      5.291713e+03     2.111953e+02     8.273807e+00      5.479855e+01   \n",
      "std       3.094749e+05     2.671786e+02     2.098046e+01      5.765815e+01   \n",
      "min       0.000000e+00     0.000000e+00     0.000000e+00      0.000000e+00   \n",
      "25%       0.000000e+00     0.000000e+00     0.000000e+00      0.000000e+00   \n",
      "50%       1.290000e+02     4.600000e+01     0.000000e+00      4.000000e+01   \n",
      "75%       5.820000e+02     3.260000e+02     0.000000e+00      1.086667e+02   \n",
      "max       1.010000e+08     1.711000e+03     1.460000e+03      1.460000e+03   \n",
      "\n",
      "       Fwd Pkt Len Std  Bwd Pkt Len Max  ...  Fwd Act Data Pkts  \\\n",
      "count     1.044525e+06     1.044525e+06  ...       1.044525e+06   \n",
      "mean      7.823176e+01     3.484667e+02  ...       2.052119e+00   \n",
      "std       9.701599e+01     5.042396e+02  ...       1.395102e+01   \n",
      "min       0.000000e+00     0.000000e+00  ...       0.000000e+00   \n",
      "25%       0.000000e+00     0.000000e+00  ...       0.000000e+00   \n",
      "50%       0.000000e+00     1.120000e+02  ...       1.000000e+00   \n",
      "75%       1.882162e+02     4.880000e+02  ...       3.000000e+00   \n",
      "max       1.032376e+03     1.460000e+03  ...       9.262000e+03   \n",
      "\n",
      "       Fwd Seg Size Min   Active Mean    Active Std    Active Max  \\\n",
      "count      1.044525e+06  1.044525e+06  1.044525e+06  1.044525e+06   \n",
      "mean       1.744416e+01  1.234346e+05  6.154744e+04  1.912618e+05   \n",
      "std        5.187126e+00  2.087119e+06  1.264132e+06  2.766778e+06   \n",
      "min        0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "25%        2.000000e+01  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "50%        2.000000e+01  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "75%        2.000000e+01  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "max        4.400000e+01  1.110000e+08  7.490000e+07  1.110000e+08   \n",
      "\n",
      "         Active Min     Idle Mean      Idle Std      Idle Max      Idle Min  \n",
      "count  1.044525e+06  1.044525e+06  1.044525e+06  1.044525e+06  1.044525e+06  \n",
      "mean   8.337649e+04  4.799736e+06  1.068351e+05  4.882636e+06  4.695151e+06  \n",
      "std    1.763232e+06  1.744425e+07  1.465423e+06  1.759949e+07  1.736415e+07  \n",
      "min    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "25%    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "50%    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "75%    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "max    1.110000e+08  1.200000e+08  7.590000e+07  1.200000e+08  1.200000e+08  \n",
      "\n",
      "[8 rows x 66 columns]\n",
      "HEREEEEEEEEEEEEEEEEEEEEEEEEEEE+++++++++++++++++++++++++++++++>>>>>>>>>>>>>>>>>>>>>>\n",
      "       Flow Duration  Tot Fwd Pkts  Tot Bwd Pkts  TotLen Fwd Pkts  \\\n",
      "count   1.044525e+06  1.044525e+06  1.044525e+06     1.044525e+06   \n",
      "mean    9.632377e-02  1.135949e-04  1.012412e-04     3.575699e-04   \n",
      "std     2.514243e-01  2.066946e-03  3.081555e-03     1.966000e-03   \n",
      "min     0.000000e+00  0.000000e+00  0.000000e+00     0.000000e+00   \n",
      "25%     4.316667e-06  2.317068e-05  0.000000e+00     0.000000e+00   \n",
      "50%     9.394167e-05  4.634135e-05  1.444231e-05     5.814867e-05   \n",
      "75%     1.850844e-02  1.390241e-04  7.221155e-05     3.352634e-04   \n",
      "max     1.000000e+00  1.000000e+00  1.000000e+00     1.000000e+00   \n",
      "\n",
      "       TotLen Bwd Pkts  Fwd Pkt Len Max  Fwd Pkt Len Min  Fwd Pkt Len Mean  \\\n",
      "count     1.044525e+06     1.044525e+06     1.044525e+06      1.044525e+06   \n",
      "mean      5.239320e-05     1.234339e-01     5.666991e-03      3.753325e-02   \n",
      "std       3.064108e-03     1.561535e-01     1.437018e-02      3.949188e-02   \n",
      "min       0.000000e+00     0.000000e+00     0.000000e+00      0.000000e+00   \n",
      "25%       0.000000e+00     0.000000e+00     0.000000e+00      0.000000e+00   \n",
      "50%       1.277228e-06     2.688486e-02     0.000000e+00      2.739726e-02   \n",
      "75%       5.762376e-06     1.905319e-01     0.000000e+00      7.442922e-02   \n",
      "max       1.000000e+00     1.000000e+00     1.000000e+00      1.000000e+00   \n",
      "\n",
      "       Fwd Pkt Len Std  Bwd Pkt Len Max  ...  Fwd Act Data Pkts  \\\n",
      "count     1.044525e+06     1.044525e+06  ...       1.044525e+06   \n",
      "mean      7.577837e-02     2.386758e-01  ...       2.215633e-04   \n",
      "std       9.397352e-02     3.453696e-01  ...       1.506265e-03   \n",
      "min       0.000000e+00     0.000000e+00  ...       0.000000e+00   \n",
      "25%       0.000000e+00     0.000000e+00  ...       0.000000e+00   \n",
      "50%       0.000000e+00     7.671233e-02  ...       1.079680e-04   \n",
      "75%       1.823136e-01     3.342466e-01  ...       3.239041e-04   \n",
      "max       1.000000e+00     1.000000e+00  ...       1.000000e+00   \n",
      "\n",
      "       Fwd Seg Size Min   Active Mean    Active Std    Active Max  \\\n",
      "count      1.044525e+06  1.044525e+06  1.044525e+06  1.044525e+06   \n",
      "mean       3.964582e-01  1.112023e-03  8.217282e-04  1.723079e-03   \n",
      "std        1.178892e-01  1.880288e-02  1.687760e-02  2.492593e-02   \n",
      "min        0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "25%        4.545455e-01  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "50%        4.545455e-01  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "75%        4.545455e-01  0.000000e+00  0.000000e+00  0.000000e+00   \n",
      "max        1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00   \n",
      "\n",
      "         Active Min     Idle Mean      Idle Std      Idle Max      Idle Min  \n",
      "count  1.044525e+06  1.044525e+06  1.044525e+06  1.044525e+06  1.044525e+06  \n",
      "mean   7.511395e-04  3.999780e-02  1.407577e-03  4.068863e-02  3.912626e-02  \n",
      "std    1.588498e-02  1.453688e-01  1.930729e-02  1.466624e-01  1.447013e-01  \n",
      "min    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "25%    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "50%    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "75%    0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00  \n",
      "max    1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00  \n",
      "\n",
      "[8 rows x 66 columns]\n",
      "INF values: 726633\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Any Left over INF values: Int64Index([], dtype='int64')\n",
      " Data Columns after removing FlowID: Index(['Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts',\n",
      "       'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min',\n",
      "       'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max',\n",
      "       'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s',\n",
      "       'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max',\n",
      "       'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std',\n",
      "       'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean',\n",
      "       'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags',\n",
      "       'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s',\n",
      "       'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std',\n",
      "       'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt',\n",
      "       'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'ECE Flag Cnt',\n",
      "       'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg',\n",
      "       'Subflow Fwd Pkts', 'Subflow Fwd Byts', 'Subflow Bwd Pkts',\n",
      "       'Subflow Bwd Byts', 'Init Fwd Win Byts', 'Init Bwd Win Byts',\n",
      "       'Fwd Act Data Pkts', 'Fwd Seg Size Min', 'Active Mean', 'Active Std',\n",
      "       'Active Max', 'Active Min', 'Idle Mean', 'Idle Std', 'Idle Max',\n",
      "       'Idle Min', 'Label'],\n",
      "      dtype='object')\n",
      "Data Shape before droping NULL and INF values: \n",
      "(1044525, 67)\n",
      "Data Shape before droping NULL and INF values: \n",
      "(533975, 67)\n",
      "After Preprocesing: Bots: (143014, 67)\n",
      "After Preprocesing: Total: (393521, 67)\n",
      "After Preprocesing: Normal: (390961, 67)\n",
      "After Preprocesing: Bots Chunk: (2560, 67)\n",
      "File: Friday-02-03-2018_TrafficForML_CICFlowMeter.csv_(Preprocessed).csv Saving ...\n",
      "File: Friday-02-03-2018_TrafficForML_CICFlowMeter.csv_(Preprocessed).csv saved to directory\n",
      "Dataset preprocessed: CIC-2018\n"
     ]
    }
   ],
   "source": [
    "if begin_from_start:        \n",
    "\n",
    "    if DATA_SET == 'ISCX-2014':\n",
    "        training_data = prepare_ISCX_2014_data(PATH = DATA_SET_PATH, INPUT_FILE_NAME = r'ISCX_Botnet-Training.pcap_Flow.csv')       \n",
    "    \n",
    "    elif DATA_SET == 'CIC-2017':\n",
    "        training_data = prepare_cic_2017_data(PATH = DATA_SET_PATH, INPUT_FILE_NAME = r'CIC_Friday_bot.csv')\n",
    "                \n",
    "    elif DATA_SET == 'CIC-2018':\n",
    "        training_data = prepare_cic_2018_data(PATH = DATA_SET_PATH, INPUT_FILE_NAME = r'Friday-02-03-2018_TrafficForML_CICFlowMeter.csv')\n",
    "                \n",
    "    elif DATA_SET == 'UNSW_BotIoT':\n",
    "        training_data = prepare_UNSW_IoT(PATH = DATA_SET_PATH, INPUT_FILE_NAME = r'UNSW_2018_IoT_Botnet_Final_10_best_Training.csv')\n",
    "                \n",
    "    elif DATA_SET == 'Darknet':\n",
    "        training_data = prepare_DARKNET_2020_data(PATH = DATA_SET_PATH, INPUT_FILE_NAME = r'Darknet.csv')\n",
    "        \n",
    "\n",
    "    print('Dataset preprocessed: ' + DATA_SET)\n",
    "    \n",
    "else:\n",
    "\n",
    "    if DATA_SET == 'ISCX-2014':\n",
    "        INPUT_TRAINING_FILE_NAME = r'ISCX_Botnet-Training.pcap_Flow.csv_VIRUT'        \n",
    "        \n",
    "    elif DATA_SET == 'CIC-2017':\n",
    "        INPUT_TRAINING_FILE_NAME = r'CIC_Friday_bot.csv'  \n",
    "        \n",
    "    elif DATA_SET == 'CIC-2018':\n",
    "        INPUT_TRAINING_FILE_NAME = r'Friday-02-03-2018_TrafficForML_CICFlowMeter.csv'       \n",
    "        \n",
    "    elif DATA_SET == 'BoT-IoT':\n",
    "        INPUT_TRAINING_FILE_NAME = r'UNSW_2018_IoT_Botnet_Final_10_best_Training.csv'       \n",
    "        \n",
    "    elif DATA_SET == 'Drebin':\n",
    "        INPUT_TRAINING_FILE_NAME = r'Drebin_API_Dataset.csv'                \n",
    "        \n",
    "    elif DATA_SET == 'Darknet':\n",
    "        INPUT_TRAINING_FILE_NAME = r'Darknet.csv'        \n",
    "\n",
    "    training_data = pd.read_csv (INPUT_TRAINING_FILE_NAME + '_(Preprocessed).csv', low_memory=False)\n",
    "    training_data= training_data.drop(['Unnamed: 0'], axis=1)\n",
    "    \n",
    "    print('Dataset Imported: ' + DATA_SET)\n",
    "    print('Training set: '+ str(training_data.shape)) \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1432,
     "status": "ok",
     "timestamp": 1646286480246,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "eg83hsWRWj16",
    "outputId": "b66f50cf-7738-4ab5-8c7b-e1dcaa61ddff"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       Flow Duration   Tot Fwd Pkts   Tot Bwd Pkts  TotLen Fwd Pkts  \\\n",
      "count  393521.000000  393521.000000  393521.000000    393521.000000   \n",
      "mean        0.145840       0.000249       0.000236         0.000808   \n",
      "std         0.304724       0.003328       0.004991         0.003033   \n",
      "min         0.000000       0.000000       0.000014         0.000000   \n",
      "25%         0.002439       0.000070       0.000058         0.000146   \n",
      "50%         0.018163       0.000162       0.000101         0.001025   \n",
      "75%         0.043428       0.000232       0.000116         0.001043   \n",
      "max         1.000000       1.000000       1.000000         1.000000   \n",
      "\n",
      "       TotLen Bwd Pkts  Fwd Pkt Len Max  Fwd Pkt Len Min  Fwd Pkt Len Mean  \\\n",
      "count    393521.000000    393521.000000    393521.000000     393521.000000   \n",
      "mean          0.000136         0.246916         0.000245          0.058060   \n",
      "std           0.004974         0.176115         0.009158          0.044768   \n",
      "min           0.000000         0.000000         0.000000          0.000000   \n",
      "25%           0.000002         0.060199         0.000000          0.022211   \n",
      "50%           0.000016         0.302162         0.000000          0.065525   \n",
      "75%           0.000017         0.395675         0.000000          0.096575   \n",
      "max           1.000000         1.000000         1.000000          1.000000   \n",
      "\n",
      "       Fwd Pkt Len Std  Bwd Pkt Len Max  ...  Fwd Seg Size Min    Active Mean  \\\n",
      "count    393521.000000    393521.000000  ...     393521.000000  393521.000000   \n",
      "mean          0.136356         0.568748  ...          0.455605       0.001048   \n",
      "std           0.092012         0.371911  ...          0.016071       0.010050   \n",
      "min           0.000000         0.000000  ...          0.454545       0.000000   \n",
      "25%           0.047674         0.158219  ...          0.454545       0.000000   \n",
      "50%           0.180339         0.803425  ...          0.454545       0.000000   \n",
      "75%           0.215642         0.803425  ...          0.454545       0.000000   \n",
      "max           1.000000         1.000000  ...          0.909091       1.000000   \n",
      "\n",
      "          Active Std     Active Max     Active Min      Idle Mean  \\\n",
      "count  393521.000000  393521.000000  393521.000000  393521.000000   \n",
      "mean        0.000724       0.001876       0.000780       0.035013   \n",
      "std         0.005257       0.012166       0.009669       0.108047   \n",
      "min         0.000000       0.000000       0.000000       0.000000   \n",
      "25%         0.000000       0.000000       0.000000       0.000000   \n",
      "50%         0.000000       0.000000       0.000000       0.000000   \n",
      "75%         0.000000       0.000000       0.000000       0.000000   \n",
      "max         1.000000       1.000000       1.000000       1.000000   \n",
      "\n",
      "            Idle Std       Idle Max       Idle Min          Label  \n",
      "count  393521.000000  393521.000000  393521.000000  393521.000000  \n",
      "mean        0.002734       0.036246       0.033240       0.993495  \n",
      "std         0.027345       0.111370       0.106079       0.080393  \n",
      "min         0.000000       0.000000       0.000000       0.000000  \n",
      "25%         0.000000       0.000000       0.000000       1.000000  \n",
      "50%         0.000000       0.000000       0.000000       1.000000  \n",
      "75%         0.000000       0.000000       0.000000       1.000000  \n",
      "max         0.890646       1.000000       1.000000       1.000000  \n",
      "\n",
      "[8 rows x 67 columns]\n"
     ]
    }
   ],
   "source": [
    "training_data = training_data.replace([np.inf, -np.inf], np.nan).dropna(how=\"any\").reset_index(drop=True)\n",
    "print(training_data.describe())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "h5phnoDzWj17"
   },
   "source": [
    "<a id=\"GPU Settings\"><h2>Display Features</h2></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "executionInfo": {
     "elapsed": 39,
     "status": "ok",
     "timestamp": 1646286480247,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "aePYFbaoWj17",
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "if DISPLAY_FEATURES: \n",
    "    unified_df = training_data.copy()\n",
    "    X_cols = unified_df.columns[:-1]\n",
    "    y_cols = unified_df.columns[-1]\n",
    "\n",
    "\n",
    "\n",
    "    axarr = [[]]*len(X_cols)\n",
    "    columns = 4\n",
    "    rows = int( np.ceil( len(X_cols) / columns ) )\n",
    "    f, fig = plt.subplots( figsize=(columns*2.5, rows*2) )\n",
    "\n",
    "    f.suptitle('Data Distributions by Feature and Label', size=16)\n",
    "\n",
    "    for i, col in enumerate(X_cols[:]):\n",
    "        axarr[i] = plt.subplot2grid( (int(rows), int(columns)), (int(i//columns), int(i%columns)) )\n",
    "\n",
    "\n",
    "        axarr[i].hist( unified_df.loc[ unified_df.Label == 0, col ] , label=['Normal'], color=('#009933'), alpha=0.5,\n",
    "                              bins=np.linspace( np.percentile(unified_df[col],0), np.percentile(unified_df[col],100),50 ),\n",
    "                              density=True )\n",
    "\n",
    "        axarr[i].hist( unified_df.loc[ unified_df.Label == 1, col ] , label=['Real Bot'], color=['#FF0000'], alpha=0.5,\n",
    "                              bins=np.linspace( np.percentile(unified_df[col],0), np.percentile(unified_df[col],100),50 ),\n",
    "                              density=True )\n",
    "\n",
    "        axarr[i].set_xlabel(col, size=12)\n",
    "    #     axarr[i].set_ylim([0,1])\n",
    "        axarr[i].tick_params(axis='both', labelsize=10)\n",
    "        if i == 0: \n",
    "            legend = axarr[i].legend()\n",
    "            legend.get_frame().set_facecolor('white')\n",
    "        if i%4 != 0 : \n",
    "            axarr[i].tick_params(axis='y', left=True, labelleft=True)\n",
    "        else:\n",
    "            axarr[i].set_ylabel('Fraction',size=12)\n",
    "\n",
    "    plt.tight_layout(rect=[0,0,1,0.95]) # xmin, ymin, xmax, ymax\n",
    "    # plt.savefig('plots/Engineered_Data_Distributions.png')\n",
    "\n",
    "    plt.show()\n",
    "    \n",
    "# else: \n",
    "#     print('Pair Plotting..')\n",
    "# #     sns.pairplot(training_data, hue=\"Label\")\n",
    "    \n",
    "#     sns.pairplot(training_data, vars=['Flow Duration', 'Total Fwd Packet', 'Total Bwd packets',\n",
    "#        'Total Length of Fwd Packet', 'Total Length of Bwd Packet'], hue=\"Label\")\n",
    "    \n",
    "#     sns.pairplot(penguins, hue=\"species\", markers=[\"o\", \"s\", \"D\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "pD-Hq093Wj18"
   },
   "source": [
    "<a id=\"GPU Settings\"><h2>Select Botnet</h2></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 36,
     "status": "ok",
     "timestamp": 1646286480248,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "jMdckAGdWj19",
    "outputId": "46fe73e9-a31e-4ad2-ad93-1365278c0e6f",
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Normal before chunk: (390961, 67)\n",
      "Real Bots before chunk: (2560, 67)\n",
      "Normal: (390961, 67)\n",
      "Real Bots: (2560, 67)\n"
     ]
    }
   ],
   "source": [
    "normal = training_data.loc[ training_data['Label']==1 ].copy()\n",
    "bots = training_data.loc[ training_data['Label']==0 ].copy()\n",
    "\n",
    "print('Normal before chunk: ' + str(normal.shape))    \n",
    "print('Real Bots before chunk: ' + str(bots.shape)) \n",
    "\n",
    "if take_chunk:\n",
    "    bots = bots[0:512]\n",
    "    \n",
    "print('Normal: ' + str(normal.shape))    \n",
    "print('Real Bots: ' + str(bots.shape)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "executionInfo": {
     "elapsed": 23,
     "status": "ok",
     "timestamp": 1646286480249,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "WEv9Tz5AWj1-"
   },
   "outputs": [],
   "source": [
    "Train = training_data.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 22,
     "status": "ok",
     "timestamp": 1646286480250,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "8TrvNDY3Wj1-",
    "outputId": "fe3d432c-995f-469a-e6e6-b26bacf6ccb7",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    2560\n",
      "Name: count, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "bots_count =  pd.DataFrame( [ [np.sum(bots['Label']==i)] for i in np.unique(bots['Label']) ], columns=['count'], index=np.unique(bots['Label']) )\n",
    "\n",
    "label_cols = [ i for i in bots.columns if 'Label' in i ]\n",
    "data_cols = [ i for i in bots.columns if i not in label_cols ]\n",
    "\n",
    "train_no_label = bots[ data_cols ].reset_index(drop=True)\n",
    "\n",
    "print(bots_count['count'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "executionInfo": {
     "elapsed": 766,
     "status": "ok",
     "timestamp": 1646286481004,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "uKyFTGYfWj1_"
   },
   "outputs": [],
   "source": [
    "train_data = bots"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "K_5UjRLxWj2B"
   },
   "source": [
    "<a id=\"Classification\"><h1>Classification</h1></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 221
    },
    "executionInfo": {
     "elapsed": 60,
     "status": "ok",
     "timestamp": 1646286481006,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "hPMsTK_pWj2B",
    "outputId": "69298b35-df4a-4b68-9100-369f528e2fdb",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2560\n",
      "CPU times: user 4.87 s, sys: 286 ms, total: 5.15 s\n",
      "Wall time: 490 ms\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOAAAACYCAYAAAD9XOVNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAe+ElEQVR4nO3deVxU5f7A8c+cGYYBQURAREXEyi1LXIrUa0KSgsvNPdGrlWaZ3sytLM1yScuStPyV996uertJl7SMzHDDJXOJFJckJDBzAdlhZB+GmfP7g5ycYBxQYGh43q+Xr5c85znnfOcwX57nnPOc5yhkWZYRBMEmJFsHIAhNmUhAQbAhkYCCYEMiAQXBhkQCCoINiQQUBBsSCSgINiQSsJF55JFH2Lhxo1nZ4cOH6dGjBzExMWzfvp3OnTuzevXqatdfvHgxnTt3Ji4uriHCFe6QSMBG7sSJE7zwwgssW7aMoUOHAuDl5cWOHTuoqKgwq1tWVkZsbCwtWrSwQaTC7RAJ2Ij9+OOPzJgxg0WLFjFy5EhTub+/P82aNePw4cNm9WNjY+nWrRvNmjUzK//0008ZMmQIPXr0YMiQIcTGxpqWlZeXs2rVKgYOHEjPnj0ZOnQou3fvNi1fv349U6dOZevWrQQHB9OrVy9mz55NcXExAHl5ecyePZvAwEB69uzJmDFj+P777+vhaNgnkYCN1M8//8z06dOZN28e48aNq7J8+PDhfPnll2ZlX375JcOHDzcri4yMZMOGDaxdu5ZTp06xaNEi5s6dy7lz5wDYtGkT+/bt47PPPiM+Pp6nnnqKBQsWkJ6ebtpGYmIily5dMnWBv/vuO7Zv3w7A2rVrKS4uZv/+/Zw4cYKxY8fy0ksvVWmdheqJBGyELl26xNSpU/H29mbChAnV1hk1ahSHDh0iLy8PgMzMTM6cOUNoaKhZvaioKKZMmUK3bt1QKpUMHDiQoKAgU/JOmzaNr776itatWyNJEo899hh6vZ6kpCTTNnQ6HXPmzMHJyYkOHTpw3333ceHCBQAKCgpQqVSo1WpUKhXh4eEcPnwYlUpVH4fG7jSaBExOTiYkJIQtW7bUet38/HwCAwOZPHkykydPZvbs2fUQYcP5+uuvmTFjBtevX+fNN9+sto6vry89e/bk66+/BmDHjh2EhIRU6X5eunSJ9957j/vuu8/07+DBg1y7dg2oTKBly5bRr18/unfvTu/evYHKpLvBx8cHtVpt+tnJycm0/JlnniEpKYkBAwYwb948du7cKVq/WmgUf6ZKSkpYsWIFffv2ve31e/fuzYcffljHkdnGzJkzmTx5Mj179mTSpEn4+/szadKkKvXGjBnD5s2beeKJJ4iOjua1116rUsfR0ZFXXnmFMWPGVLuvOXPmoNPpiIqKwtfXF6PRSLdu3czqSJLlv9P33nsvsbGxfP/99xw+fJiVK1eyZcsWIiMjUSqVtfzkTU+jaAHVajUfffQRrVq1MpVduHCBKVOm8MQTTzBz5kwKCgosrn/jgoC9uPHF7d69O6tWrWLVqlUcOXKkSr0hQ4aQmprKrl27KC0t5cEHH6xSx8/Pj/Pnz5uVXbt2DYPBAMCZM2cYP3487du3R6FQcPbs2VrFeuP3MmDAABYvXszWrVs5ffq0WRdWsKxRJKBKpUKj0ZiVrVixguXLl/Pxxx/Tv39/IiMjLa5fUlLCxYsXee6555gwYQIxMTH1HXKDGTZsGFOnTmXOnDmm864bNBoNQ4cO5Z133mHUqFEoFIoq60+aNIkvvviCo0ePUlFRwenTpxk9ejQHDx4EKruyZ8+eRa/Xk5CQwMaNG3F1dSUzM7NG8Y0fP55169ZRUlKC0Wjk7NmzODo60qZNmzv/8E1Ao+iCVufHH39kyZIlQOWl8vvuu4+ffvqJZcuWmdUbNGgQI0eOZNasWQwbNoz8/HzGjx9Pnz59zFrUP7O5c+eSkpLCs88+S3h4uNmysWPHsnXrVrPbFDcbNWoU2dnZLF68mLy8PHx8fHjhhRcICQkBYOnSpSxZsoQHHniAbt26sXLlSqKiooiIiMDZ2dlqbO+99x5vvPEGf/nLX4DKWyTr16/H3d39zj50E6FoTE/E3/jF/e1vf6Nfv34cPXq02r/q1rzwwguEh4fz0EMP1UOUglB3GkUXtDpdunQx3Wj+5ptvOH78uMW63333HWvWrAEqu6NJSUn4+/s3SJyCcCcaRQuYkJDA6tWrSUtLQ6VS4e3tzZw5c4iIiECSJBwdHYmIiLA4xKq8vJxXXnmFtLQ0KioqCA8Pt3jVTxAak0aRgILQVDXaLqggNAU2vQpaVlZGQkICXl5e4qatYJcMBgPZ2dl07969yq02sHECJiQkVDvCQxDsTWRkJH369KlSbtME9PLyAiqDa926tdmy9IXPQnZ6das1DpKE0qcdLZ6ajbpjp1tWlUuKyX1/JRVXLyKXlqD09MY5OAyXsNFU5GWT/39vUXHtCuj1KFt54/rXcJz6BiHLMgWR/6Ls9PcY83NBksBoBKUSkMFgvOV+FS1a0nzkRJwfHkzhN9so/XYvhpxMUCjg5lN/pRKUKij/ffynwqU5KBTIhddRaJxQ+frTLCiMwq8/w5B1DZRKVD7tcH92ASof31ofvpLvYin6ZhuG7AwUag3KNu3wmP0qktud3T8s/Op/lB7ZjyE3C4VzMxw63E3Lvy9GUU3rcyeKD+2hePcXGLIzUTg6omrjR8vnFyO5tTCrl5GRwaRJk0zf9T+yaQLe6Ha2bt2adu3amcrLfoxH1maCQyM/Rc1KxWHbv/F+bwuKW4yXzHnzZTx+Taz8wUGC69lIB3fiFfZXtP/7J8q0XyqXqYC8TJQxW2kd+lfKzp6AHw7RXFf6+7FQSsBvyXOLfQJQrEXa8zkt7wtA8e03uFzX3nRMb76/KoOsNz/euqLf4zXo4FISRF1GU1YKSgADpF9GHfUvvCM2WztSZgzaPDL3bMMpPxNUCjDqIPUXHP/3T1q98X+12tbNdMk/weFduBQVVMatL4WUczh/9Qke85dZ30BN48/NJnPvFzjnZ1XGbyiHqyloPvsXXsvfr3YdS6dYjfIbnr1uqa1DqLGKa6nof02+ZR39rylVyozX8yj8/JPKlu8PDFnXKD4YQ/GBb0BXekfxGXOyuL5xHcbr2jvaDgBlVWOpSE+lIiujVpsp3rcDQ3bVoW4VaZcx6spuO7yinduQi6qOGS6/+PNtb7Pa/ezejiE3q0q5PvUSci2fBGmUCahQNtoRclVJisru2y3rVH+YFQ4qUFhYpnK4ZataK/V5PBUSitpeQLMUj0JxZ5/ZQhy3M5rqVhQqBwsLFOYdixpolAno9fJbtg6hxlRt2uPgd9ct66jv6ValTGrpRfNxT6Jq37HKMqVPO5yDQmkWNhqFs8sdxSe18sF9xotI7p53tB0AnJpVKXJo1x6lR/XnN5a4PPpXlN5VB2s7dLgbhYO6mjVqxnXkRCS3luaFCgXqLvff9jar0yx0NMpWPlXK1f6dat14NMoEdLyrM6ruvWwdRjUUlX/lJAmcnFDf0w2PF1dY/QvrPusVNA8OQHL3AEcNKt8OuE1+DlUrHzzmL8fxvt5IzVugcHJC5XcX7jNeQnJuhlPvfriODEfp0w5UqsqWQ6lE4eQM1r6oCgWST1taPjMfx3u60vxvz6Jq1wHUjuYtkEIBakcUrm6Vn0uSQKVG8vBC6ekNjhqkFh5oevfDc/HbOPh3QuHUDIWrG+quPWj54spaH0XJtTnu0+ehat8RhcYZyc0dxx4P4DF3aa23dTO13100D38aVVu/yrhbeuHUNxj3Z+ff0Xb/SOnWArdpc36L36ky/p6BtJz7eq23ZdORMKmpqQwaNIj9+/ebXYS5wVBSQvYb89H/8nPlF6NAW7sdqBwruwT6367uubVE6X83FBXj2LkrDu38qSgtRp9yHkc/f5yDhyEXXqciOwOnwIHoUhLRnT0BKhVO/QYhSfzWx1egUEqo2vrVqntjyM3GoM3Fof1dKBzMuzEVmdcwlhTj0L5jlS6dsbSEirQrSF7eyNo8UDmg8mlH+aULGLV5IBsxFhWivjcAdGVUZGUiNXdD3eFus23J+nL0Vy6ibOGBLMvofjqD+q5OOLTrQEV2BsaCyiuesq6sslWXZfSXf0Fya4HK07tyG0Yj+isXkRw1qHyq/s5qQzYYKrfv4oqqmhbltrdbrkN/9VeU7p4oW9ZBy29pP4YK9Jd+QXJ1Q9WqdbV1rH3HG3UCCsKfnbXveKPsggpCUyESUBBsSCSgINhQjRIwJyfH9P/jx4/f8uFYQRBqzupNi3Xr1nH16lUiIiJYv349O3bswNPTkyNHjvDiiy82RIyCYLestoAxMTGsWrUKo9HIp59+yr///W+2bNnCgQMHGiI+QbBrVltAtVqNo6Mj8fHxeHl54efnB9T98B5BaIqsJqCnpycffPABR44cYcSIEQAcO3asyhTogiDUntUu6Ntvv01xcTEhISFMmzYNgN27d7NixYp6D04Q7J3VFjAmJoaXXnrJrGz58uWsXr2aLl261FtggtAUWEzA5ORkkpKS2LRpE56entw8Yq2goICoqCgWLlzYIEEKgr2ymIBlZWXEx8dTUFDAZ599ZrbMwcFB3IIQhDpgMQHvv/9+7r//frp06VLlfQRAjV/eIQiCZVYvwkRHR5vewnpz2ahRo+otKEFoKqwm4NixYwkPD+fChQvk5eUxa9YsNm7cyEcffVTjnZSVlTFo0CDTe8UFQahk9SrouHHj8Pf3Z+bMmZSUlDBmzBjWrVuHg4OFeTGqsWHDBovvdRCEpsxiC3jq1CnTP0mSmD17NkajEX9/f86dO8epU6dqtINffvmFCxcuEBQUVFcxC4LdsNgCLliwoEqZRqPh/fcr5z1UKBTs37/f6g5Wr17NkiVLiI6Ovv0oBcFOWUzAuhhsHR0dTUBAAL6+tZ85WRCaAosJKMsyW7du5ejRozg4ODBkyBAGDx5cq40fOnSIq1evcujQITIyMlCr1bRu3Zp+/frdceCCYA8sJuC6des4fvw4I0eOpKKigg8++IDs7OxavUxl3bp1pv+vX7+etm3biuQThJtYTMB9+/axbds201MPw4YNY/r06eJtRoJQhywmoNFoNHvkyMPDg9LS239PwfPPP3/b6wqCvbJ4G0KqZo5+8RCuINQtiy1gfn4+//jHP25ZNmPGjPqLTBCaAIsJGBQUxOXLl62WCYJw+ywm4JtvvtmQcQhCkyQm5hUEGxIJKAg2JBJQEGzI4jngiRMnrK78wAMP1GkwgtDUWEzAxYsXA5X3/lJTU9FoNDRv3hytVoter+euu+7iq6++arBABcEeWUzAvXv3ApVXQ3v27EloaChQOUh7586dJCQkNEyEgmDHrJ4Dfvvtt6bkg8oWccSIEXz77bf1GpggNAVWE1ClUvH5559TVFQEQFFREdHR0WJYmiDUAatzwqxevZqlS5fy6quvolAokGWZrl27ihv1glAHrCZgeno6GzduRKPRoNVqcXNzw9HRsSFiEwS7Z7UL+vnnnxMaGsrEiRP55JNPiI+PR6fTNURsgmD3rLaAN55+uHDhAidPniQ6OpqVK1fi6enJxx9/XO8BCoI9q9FIGL1eT2FhIUVFRRQVFSHLMhqNpr5jEwS7Z7UFnDhxInq9nq5duxIQEMCCBQvo2LFjQ8QmCHbPagt4zz33AHDx4kV+/fVXLl++jFarre+4BKFJsNoCLlu2DACtVsvJkyf54YcfWL9+PTqdjm+++abeAxQEe2Y1AaHyVWSnT5/m9OnTnDlzhtLSUnr16lXfsQmC3bOagEFBQUiSxIMPPkhgYCBPPPEEbdq0aYjYBMHuWUzA8+fP07VrV/773//Svn17tFqteMORINQxixdh5s+fD0D79u2ByquhgiDULYsJKMvyLX8WBOHOWUzAPz7tIJ5+EIS6J+aEEQQbqvHM2NXNlF2TmbFLS0t5+eWXyc3NRafTMXPmTIKDg+8gZEGwHzWeGft2Z8U+ePAg3bt3Z/r06aSlpTF16lSRgILwm3qfGXvo0KGm/6enp+Pt7V0n2xUEe1CjkTB1YcKECWRkZFTpxgpCU9ZgF2GioqLYsGEDL774orilIQi/qfcETEhIID09HYCuXbtiMBjIy8ur790Kwp+C1QS8MSLmj8aNG1ejHZw8eZJNmzYBkJOTQ0lJCe7u7rUIURDsl8VzwAMHDnDgwAG+++47lixZYrasoKCAK1eu1GgHEyZMYPHixUycOJGysjJee+21at++KwhNkcUE7NGjB6WlpcTGxla5ctm2bVuefvrpGu1Ao9EQERFxZ1EKgp2ymIAeHh4MGzYMf39/unXrhsFgQKvV4u7uLlowQagjVm9DuLq68tRTT/HDDz8gyzKSJNG/f3+WL18u7ukJwh2y2pQtWbKEhx9+mLi4OBITEzl69Ci9evWqcl4oCELtWU3ArKwsnnrqKVxcXABwc3Pj2WefJS0trd6DEwR7ZzUBlUolV69eNStLTU0V54GCUAcsngOWlJTg7OzMzJkzGT16NA899BDNmzcnPz+f+Ph4VqxY0ZBxCoJdstiMjR07FoCwsDB27NjBwIED8fPzIyQkhB07djB48OAGC1IQ7JXFFvDm8Zo+Pj6mhBQEoe5YTECdTsfp06dvOXBazA0qCHfGYgJmZWWxYMECiwmoUCjYv39/vQUmCE2BxQT09fVl165dDRmLIDQ54l6CINiQxQQMDAxsyDgEoUmymIBLly5twDAEoWkSXVBBsKEGm5SpIcmyjCEvm4qcHAwFeShd3FD7+SM5u5jVMxYWgEKB5OJadRuGCgzZmchGIwq1I0p3DxRK5S33a9DmoXDUIDk51+nnEeyX3SVg6ZkfyH13GXJuZpVlqnu60WrFeoylJeS/uxT9tSsoFApUvv54LFiBskVLAAq+3EJB1CbkokKg8jaM5OlN88efwnVo1QEJZUnnuP6vNVRkZaBwUKPudC8t5y5F0mjq9bMKf352lYCGAi25Ea8j52VXu7wiJZHsVQuhtAR9SuLv6+VkkfvmQlqt/ojSk8e4/vGHoC83W9eYk8n1/3yA2r8Tjl3v/728rJS8d1/HkPb7FB2lWenkSRKeC1fV8ScU7I1dnQMW7dxmMflu0P98Dv2Vi1XKyy9fpCI9laJvtlVJvhvk4kIKv4w0KyuO3Ynh2tUqdcuTE5H1+lpELzRFdpWAxsLr1itVGKBcV7VcX46xpBi54tZJI+vN1zUWaKG60UIGPbLBYD0eoUmzqwRsFjoarJx3Kdu0Q+njW7Xcuw0OHe5GE3CL+59KJU4PBZnv89ERSC09q1Zt1UacAwpW2VUCqv064jJiAigtnNo6OdPy76/iNvk5lK3bVpYpFKh8fGkx7QUUSiWuI8PR9OkPij8cGqUKp36P0OzRv5oVq7xa4zLicSSPVr/VU6Lyuwv351+p408n2COFbMN54i9fvszgwYOJjIykdevWdbZdQ34Ohbuj0f0Yj7GoAJo1wyU4DJfgoSjUjgDIpSWUxh0GSYlT4AAUjuatle7XFEoO7cZYVIDKuw3OfYNw8PW3uE/j9XxKfvgOpZs7mt79rN6yEJqGjIwMJk2axN69e/Hz86uy3KYJePLkSSZNmmSr3QtCg4mMjKRPnz5Vym2agGVlZSQkJODl5YVStBiCHTIYDGRnZ9O9e3c01VwTsGkCCkJTZ1cXYQThz8auRsLcbNWqVZw9exaFQsGiRYu4//7fR68cO3aMd999F6VSycMPP8ysWbMsrpOens5LL72EwWDAy8uLd955B7VaTUxMDJs2bUKSJPr27cvcuXMbRVw3zJs3D7VazVtvvdVojllSUhKLFi0CICQkhJkzZzaKuNauXUtcXByyLBMSEsL06dNrfMzumGyH4uLi5GeeeUaWZVlOSUmRx44da7Y8LCxMvnbtmmwwGOTHH39cTklJsbjOyy+/LMfExMiyLMurV6+WIyMj5ZKSEjk4OFguLCyUjUajPHbsWDklJcXmcd1w5MgRecyYMfLChQsbzTGTZVkeO3asnJCQIBsMBnnu3LlySUmJzeP6+eef5ccff1yWZVk2GAxyaGionJWVVePjdqfssgt6/PhxQkJCALj77rspKCigqKgIgKtXr+Lm5oaPjw+SJDFw4ECOHz9ucZ24uDgGDRoEwKBBgzh+/DhOTk7s2LEDFxcXFAoFLVq0QKvV2jwugPLycjZs2MBzzz3XqI7ZjXdD3nvvvUiSxLvvvouTk5PN43J1dUWn01FeXo5Op0OSpBrFVVfsMgFzcnLMXgLq4eFBdnblGNHs7GxatmxpWubp6Ul2drbFdUpLS01dOy8vL9N2bkzVn5ycTFpaGj169GgUcf3zn/8kPDzcFF9N1XdsaWlpeHh4sGzZMiZOnMh//vOfRhGXj48PoaGhBAcHExwczIQJE2p97O6EXSag/IcLu7Iso1Aoql0GlTO8WVrnxnrVrXvp0iXmz59PREQEDg4ONo/r0qVLJCQkMGzYMKuxNHRssixz6dIlZs+ezcaNG9m+fTvJyck2j+vq1avs27eP2NhY9u3bR1RUFLm5uVbjqit2eRHG29ubnJwc089ZWVl4enpWuywzMxMvLy9UKlW16zg5OVFWVoZGoyEzM5NWrSqHnGVkZDBr1izefvttunbt2ijiOnToENeuXWP8+PEUFRWRl5fHRx99VKOLCvUdm4eHB/fcc4+pZerduzcXLlygU6dONo3r3Llz9OjRw9Tt7Ny5M8nJyfTt29fqMasLdtkC9u/fnz179gCQmJhIq1atTN2Kdu3aUVRURGpqKhUVFRw8eJD+/ftbXKdfv36m8r179zJgwAAAFi9ezNKlS7n33nsbTVxPPvkkX3/9NVu3buX1118nKCioxlf06js2X19fiouL0Wq1GI1Gzp8/T8eOHW0eV/v27UlISMBoNKLX60lOTsbXt+pg/fpitzfi16xZw8mTJ1EoFLz++uskJibi6urKo48+yokTJ1izZg0AgwcPZtq0adWu06VLF7Kysli4cCE6nY42bdrw5ptvkpqaysiRI80uhz/55JOmE3xbxXVzNzguLo4vv/yyVrch6ju2s2fPsmbNGnQ6HQMGDOD5559vFHG9//77HDt2DFmWCQsL48knn6zxMbtTdpuAgvBnYJddUEH4sxAJKAg2JBJQEGxIJKAg2JBIQEGwIZGADaBz5848+uijhIaGmv7duFz+yCOPcPLkyTrd3/bt2wkICCA0NJSQkBBCQkJYtWoVBQUFdbqf8vJyoqOjgcqb4MOHD6/T7TcFdjkSpjH65JNP6nTeG2sCAgJM4y0LCwt55513mDx5Mlu3bsXR0bFO9pGYmEh0dDQjR47E29ubnTt31sl2mxLRAjYiu3btYvjw4YSGhjJlyhSuXLnCsWPHCA8PN9V5+umnmT9/vunnESNG8NNPP91yu66urixduhSlUmlqsTp37kxGRoapzo2f4+LimDBhAnPmzDHtZ9u2bYSFhTF48GAmTZpEWloaOTk5/P3vf+fMmTNMnDiR1NRUunXrBoDRaGTt2rWm1v7ll1+mpKQEgMmTJ7N582bCw8MZMGAA8+bNu+Vr0O2dSMBG4tq1ayxZsoQPPviA3bt3ExQUxGuvvUavXr1ISUlBr9djMBjIz8/n4sXKmb0LCgrIzs6u0VhUSZIICwsjLi7Oat3ExEQef/xxIiIiyM3NZfny5WzevJm9e/fSvn17PvzwQzw9PZk3bx4BAQF8+umnZuvv2rWLw4cPs337dmJiYigoKDB7+uHAgQNs3ryZPXv28P3333Pq1KnaHSw7IhKwgUyePNnsHPDVV181W3706FECAwNNU9eNGzeOuLg4lEolXbp04fz58yQlJdGxY0datGhBZmYmp06d4sEHH0SSavZrdHFxobCw0Go9jUZjGozs4eFBfHy8qfvcp08frl6tOhX/zQ4dOsTIkSNxdnZGkiRGjx7N0aNHTctDQ0PRaDQ4OzvToUMH0tPTaxS/PRLngA3E2jlgfn4+zZs3N/3s6uqKLMtotVoCAwM5ffo0sizTs2dPsrOziY+PJzExkYceeqjGMdx4Js8aNzc30/8NBgPr169n//79GAwGiouL8fe3PD8qQF5entk23NzczB7xufl5O6VSiaEJT+EvWsBGwsPDw+yp+uvXryNJEu7u7gQGBnLmzBni4+Pp1asXPXv25NSpU8THx9f4sRmDwUBsbCz9+/cHKrukN774169bfqdGTEwM+/fvZ8uWLezZs4fZs2db3Zenp6fZZ9FqtaZHiARzIgEbif79+3Py5ElT9y4qKor+/fujUqkICAggKSmJ5ORkOnXqREBAAKdOnSInJ8dqawRQXFzMkiVLcHNzIywsDKh8IjwpKQmAL774wmI3Njc3l7Zt2+Lu7k5+fj4xMTEUFxcDoFKpKCoqqnIRZeDAgezYsYPS0lIqKirYtm0bAwcOvO1jY89EAjYSrVu3ZsWKFcycOZOwsDBOnDjB8uXLAVCr1Xh7e9OuXTskSaJ58+aUl5fTq1cvi9s7c+YMoaGhDB48mLCwMBwdHdm4cSMqVeVZx9y5c1m6dCmPPfYYTk5OFqdhGD58OFqtluDgYObPn8/cuXPJyMjgjTfeoHfv3mRlZTFgwACMRqNpnbCwMB5++GFGjx7N8OHD8fHxYcqUKXV4tOyHeBxJEGxItICCYEMiAQXBhkQCCoINiQQUBBsSCSgINiQSUBBsSCSgINiQSEBBsCGRgIJgQ/8PY7L/u5x5m/QAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 216x144 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%time \n",
    "# if GAN_type == 'CGAN' or GAN_type == 'WCGAN':\n",
    "if USE_KMEANS_FOR_CLASSIFICATION:\n",
    "    algorithms = [ \n",
    "    #     [ 'KMeans', cluster.KMeans, (), {'random_state':0} ],\n",
    "        [ 'KMeans', cluster.KMeans, (), {'n_clusters':1, 'random_state':0} ],\n",
    "    #     [ 'KMeans 3', cluster.KMeans, (), {'n_clusters':3, 'random_state':0} ],\n",
    "    #     [ 'Agglomerative', cluster.AgglomerativeClustering, (), {} ],\n",
    "    #     [ 'Agglomerative', cluster.AgglomerativeClustering, (), {'linkage': 'ward', 'n_clusters': 3} ],\n",
    "    #     [ 'Agg. Ave 3', cluster.AgglomerativeClustering, (), {'linkage': 'average', 'n_clusters': 3} ],\n",
    "    #     [ 'Agg. Complete 3', cluster.AgglomerativeClustering, (), {'linkage': 'complete', 'n_clusters': 3} ],\n",
    "    #     [ 'DBSCAN', cluster.DBSCAN, (), {'eps':0.025} ],\n",
    "    #     [ 'HDBSCAN', hdbscan.HDBSCAN, (), {} ],\n",
    "    #     [ 'HDBSCAN', hdbscan.HDBSCAN, (), {'min_cluster_size':10, 'min_samples':1, } ],\n",
    "    #     [ 'HDBSCAN 2 10', hdbscan.HDBSCAN, (), {'min_cluster_size':2, 'min_samples':10, } ],\n",
    "    #     [ 'HDBSCAN 10 10 ', hdbscan.HDBSCAN, (), {'min_cluster_size':10, 'min_samples':10, } ],\n",
    "    ]\n",
    "\n",
    "    rows = len(algorithms)\n",
    "    columns = 1\n",
    "    fig, ax = plt.subplots(3, 2, figsize=(3, 2),\n",
    "                            constrained_layout=True)\n",
    "\n",
    "    for i, [name, algorithm, args, kwds] in enumerate(algorithms):\n",
    "\n",
    "        labels = algorithm(*args, **kwds).fit_predict(train_no_label)\n",
    "        print(len(labels))\n",
    "        colors = np.clip(labels,-1,9)\n",
    "        colors = [ 'C'+str(i) if i>-1 else 'white' for i in colors ]\n",
    "\n",
    "        plt.subplot(rows,columns,i*columns+1)\n",
    "        plt.scatter(train_no_label[data_cols[0]], train_no_label[data_cols[1]], c=colors)\n",
    "        plt.xlabel(data_cols[0]), plt.ylabel(data_cols[1])\n",
    "        plt.title(name)\n",
    "            \n",
    "\n",
    "#     else:\n",
    "#         labels = train_bots_only['Label'].values.tolist() \n",
    "#         sns.set(style=\"ticks\", color_codes=True) # Remove background and grid\n",
    "\n",
    "#     #     g = sns.scatterplot(data_cols[0],data_cols[1], data=train, hue=labels)\n",
    "\n",
    "#     #     plt.show() \n",
    "\n",
    "\n",
    "#         plt.figure()\n",
    "#         ax = sns.countplot(y=\"Label\", data=train_bots_only) # for Seaborn version 0.7 and more\n",
    "#         for p in ax.patches:\n",
    "#             ax.text(p.get_y() + p.get_width() + 2700 , p.get_y()+p.get_height()-0.1, p.get_width(), ha=\"center\") \n",
    "\n",
    "#         ax.set_ylabel('Botnets')\n",
    "\n",
    "#         plt.savefig('Botnet-Trainset.pdf', dpi=600)\n",
    "#         plt.show()\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "#         plt.figure(figsize=(6, 6))\n",
    "#         ax = sns.countplot(y=\"Label\", data=test_bots_only) # for Seaborn version 0.7 and more\n",
    "#         for p in ax.patches:\n",
    "#             ax.text(p.get_y() + p.get_width() + 6000 , p.get_y()+p.get_height()-0.1, p.get_width(), ha=\"center\") \n",
    "\n",
    "#         ax.set_ylabel('Botnets')\n",
    "\n",
    "#         plt.savefig('Botnet-Testset.pdf', dpi=600)\n",
    "#         plt.show()\n",
    "\n",
    "#     #     g = sns.catplot(x=\"class\", hue=\"who\", col=\"survived\", data=titanic, kind=\"count\", height=4, aspect=.7);\n",
    "\n",
    "\n",
    "#     #     sns.pairplot(data=train, vars=[data_cols[0], data_cols[1]], hue='Label')\n",
    "\n",
    "\n",
    "#     # plt.grid(False)\n",
    "#     # plt.show()\n",
    "#     print(train_no_label.describe())\n",
    "    \n",
    "    botnet_w_classes = train_no_label.copy()\n",
    "    botnet_w_classes['Label'] = labels\n",
    "\n",
    "#     print(botnet_w_classes.describe())\n",
    "    train_data = botnet_w_classes\n",
    "    \n",
    "# else:\n",
    "#     train_data = train_no_label\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 54,
     "status": "ok",
     "timestamp": 1646286481008,
     "user": {
      "displayName": "Rizwan Hamid Randhawa",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiCoBmDlAGvdCQjk-uJ2xxx1pJC0fjRsK2FcWLNdaY=s64",
      "userId": "15615777248917371178"
     },
     "user_tz": 0
    },
    "id": "wmB4aCmMWj2D",
    "outputId": "bb6c85f1-3ae0-4e88-99bd-0b6f26f1f70d",
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       0\n",
       "1       0\n",
       "2       0\n",
       "3       0\n",
       "4       0\n",
       "       ..\n",
       "2555    0\n",
       "2556    0\n",
       "2557    0\n",
       "2558    0\n",
       "2559    0\n",
       "Name: Label, Length: 2560, dtype: int32"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data['Label']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "q168UjfTWj2E"
   },
   "source": [
    "<a id=\"GPU Settings\"><h2>GAN Training</h2></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Result: (2560, 67)\n",
      "log_interval : 10\n",
      "Total Batch Iterations: 1500\n",
      "['Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Subflow Fwd Pkts', 'Subflow Fwd Byts', 'Subflow Bwd Pkts', 'Subflow Bwd Byts', 'Init Fwd Win Byts', 'Init Bwd Win Byts', 'Fwd Act Data Pkts', 'Fwd Seg Size Min', 'Active Mean', 'Active Std', 'Active Max', 'Active Min', 'Idle Mean', 'Idle Std', 'Idle Max', 'Idle Min']\n",
      "CIC-2018_2022-05-09 22:05:54.065181\n",
      "WARNING:tensorflow:From /home/nu/anaconda3/lib/python3.9/site-packages/keras/layers/normalization/batch_normalization.py:532: _colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n",
      "['loss']\n",
      "Normal: (390961, 67)\n",
      "Bots: (2560, 67)\n",
      "(1792, 67) (273672, 67)\n",
      "======================================================\n",
      "Starting GAN Training..\n",
      "======================================================\n",
      "['loss']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/nu/anaconda3/lib/python3.9/site-packages/keras/engine/training_v1.py:2079: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.\n",
      "  updates=self.state_updates,\n",
      "2022-05-09 22:05:57.701775: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2022-05-09 22:06:00.594541: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5411 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1070 Ti, pci bus id: 0000:0a:00.0, compute capability: 6.1\n",
      "2022-05-09 22:06:00.595321: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 6167 MB memory:  -> device: 1, name: NVIDIA GeForce GTX 1070 Ti, pci bus id: 0000:0b:00.0, compute capability: 6.1\n",
      "2022-05-09 22:06:00.595935: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 5530 MB memory:  -> device: 2, name: NVIDIA GeForce GTX 1070 Ti, pci bus id: 0000:42:00.0, compute capability: 6.1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:Discrepancy between trainable weights and collected trainable weights, did you set `model.trainable` without calling `model.compile` after ?\n",
      "WARNING:tensorflow:Discrepancy between trainable weights and collected trainable weights, did you set `model.trainable` without calling `model.compile` after ?\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/nu/anaconda3/lib/python3.9/site-packages/keras/optimizer_v2/adam.py:105: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.\n",
      "  super(Adam, self).__init__(name, **kwargs)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training for 256 steps ...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/nu/anaconda3/lib/python3.9/site-packages/keras/engine/training_v1.py:2079: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.\n",
      "  updates=self.state_updates,\n",
      "/home/nu/anaconda3/lib/python3.9/site-packages/keras/engine/training_v1.py:2079: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.\n",
      "  updates=self.state_updates,\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:Discrepancy between trainable weights and collected trainable weights, did you set `model.trainable` without calling `model.compile` after ?\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/nu/anaconda3/lib/python3.9/site-packages/keras/engine/training_v1.py:2079: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.\n",
      "  updates=self.state_updates,\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 256/256: episode: 1, duration: 12.992s, episode steps: 256, steps per second:  20, episode reward: 56.000, mean reward:  0.219 [ 0.000,  1.000], mean action: 5.266 [0.000, 11.000],  loss: 0.626883, mae: 0.694351, mean_q: 1.223371\n",
      "done, took 12.994 seconds\n",
      "WARNING:tensorflow:Discrepancy between trainable weights and collected trainable weights, did you set `model.trainable` without calling `model.compile` after ?\n",
      "  g_loss: 87.78502\n",
      "Evasions: 56\n",
      "Time left = 1.7 hours\n",
      "Total Time Taken: 0.7 minutes\n",
      "epoch_number: 1 completed\n",
      "======================================================\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.806s, episode steps: 256, steps per second:  44, episode reward:  2.000, mean reward:  0.008 [ 0.000,  1.000], mean action: 5.172 [0.000, 11.000],  loss: 0.220386, mae: 0.479570, mean_q: 0.930117\n",
      "done, took 5.809 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.983s, episode steps: 256, steps per second:  43, episode reward:  1.000, mean reward:  0.004 [ 0.000,  1.000], mean action: 5.125 [0.000, 11.000],  loss: 0.140457, mae: 0.370254, mean_q: 0.684127\n",
      "done, took 5.986 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.781s, episode steps: 256, steps per second:  44, episode reward: 43.000, mean reward:  0.168 [ 0.000,  1.000], mean action: 5.090 [0.000, 11.000],  loss: 0.107770, mae: 0.297888, mean_q: 0.551259\n",
      "done, took 5.782 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.315s, episode steps: 256, steps per second:  48, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.074 [0.000, 11.000],  loss: 0.092549, mae: 0.262267, mean_q: 0.496595\n",
      "done, took 5.317 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 6.866s, episode steps: 256, steps per second:  37, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.035 [0.000, 11.000],  loss: 0.075480, mae: 0.236302, mean_q: 0.442611\n",
      "done, took 6.870 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.311s, episode steps: 256, steps per second:  48, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.027 [0.000, 11.000],  loss: 0.069711, mae: 0.221820, mean_q: 0.420638\n",
      "done, took 5.315 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.426s, episode steps: 256, steps per second:  47, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.082 [0.000, 11.000],  loss: 0.059817, mae: 0.209656, mean_q: 0.398673\n",
      "done, took 5.428 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.378s, episode steps: 256, steps per second:  48, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.078 [0.000, 11.000],  loss: 0.051979, mae: 0.197643, mean_q: 0.353304\n",
      "done, took 5.381 seconds\n",
      "  g_loss: 8.834775\n",
      "Evasions: 46\n",
      "Time left = 2.0 hours\n",
      "Total Time Taken: 1.6 minutes\n",
      "epoch_number: 2 completed\n",
      "======================================================\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.684s, episode steps: 256, steps per second:  45, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.125 [0.000, 11.000],  loss: 0.047394, mae: 0.181683, mean_q: 0.295516\n",
      "done, took 5.686 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.450s, episode steps: 256, steps per second:  47, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.156 [0.000, 11.000],  loss: 0.044390, mae: 0.174247, mean_q: 0.278961\n",
      "done, took 5.452 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 5.050s, episode steps: 256, steps per second:  51, episode reward:  1.000, mean reward:  0.004 [ 0.000,  1.000], mean action: 5.188 [0.000, 11.000],  loss: 0.040689, mae: 0.173311, mean_q: 0.273624\n",
      "done, took 5.052 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.885s, episode steps: 256, steps per second:  52, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.191 [0.000, 11.000],  loss: 0.038888, mae: 0.164518, mean_q: 0.258765\n",
      "done, took 4.887 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.955s, episode steps: 256, steps per second:  52, episode reward:  2.000, mean reward:  0.008 [ 0.000,  1.000], mean action: 5.172 [0.000, 11.000],  loss: 0.037008, mae: 0.161322, mean_q: 0.248108\n",
      "done, took 4.957 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.534s, episode steps: 256, steps per second:  56, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.199 [0.000, 11.000],  loss: 0.035267, mae: 0.158918, mean_q: 0.245608\n",
      "done, took 4.536 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.819s, episode steps: 256, steps per second:  53, episode reward:  2.000, mean reward:  0.008 [ 0.000,  1.000], mean action: 5.215 [0.000, 11.000],  loss: 0.031842, mae: 0.147771, mean_q: 0.233139\n",
      "done, took 4.821 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.699s, episode steps: 256, steps per second:  54, episode reward:  2.000, mean reward:  0.008 [ 0.000,  1.000], mean action: 5.219 [0.000, 11.000],  loss: 0.032451, mae: 0.150262, mean_q: 0.237466\n",
      "done, took 4.702 seconds\n",
      "  g_loss: 5.7939305\n",
      "Evasions: 7\n",
      "Time left = 1.98 hours\n",
      "Total Time Taken: 2.4 minutes\n",
      "epoch_number: 3 completed\n",
      "======================================================\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.592s, episode steps: 256, steps per second:  56, episode reward:  3.000, mean reward:  0.012 [ 0.000,  1.000], mean action: 5.234 [0.000, 11.000],  loss: 0.028260, mae: 0.142438, mean_q: 0.217606\n",
      "done, took 4.594 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.754s, episode steps: 256, steps per second:  54, episode reward:  2.000, mean reward:  0.008 [ 0.000,  1.000], mean action: 5.250 [0.000, 11.000],  loss: 0.029397, mae: 0.140954, mean_q: 0.220787\n",
      "done, took 4.756 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.378s, episode steps: 256, steps per second:  58, episode reward:  3.000, mean reward:  0.012 [ 0.000,  1.000], mean action: 5.238 [0.000, 11.000],  loss: 0.027110, mae: 0.137980, mean_q: 0.209649\n",
      "done, took 4.380 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.278s, episode steps: 256, steps per second:  60, episode reward:  4.000, mean reward:  0.016 [ 0.000,  1.000], mean action: 5.242 [0.000, 11.000],  loss: 0.026963, mae: 0.135933, mean_q: 0.205030\n",
      "done, took 4.280 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.664s, episode steps: 256, steps per second:  55, episode reward:  7.000, mean reward:  0.027 [ 0.000,  1.000], mean action: 5.262 [0.000, 11.000],  loss: 0.026407, mae: 0.136048, mean_q: 0.207090\n",
      "done, took 4.666 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.489s, episode steps: 256, steps per second:  57, episode reward:  4.000, mean reward:  0.016 [ 0.000,  1.000], mean action: 5.262 [0.000, 11.000],  loss: 0.023792, mae: 0.126006, mean_q: 0.195318\n",
      "done, took 4.491 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.450s, episode steps: 256, steps per second:  58, episode reward:  6.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 5.258 [0.000, 11.000],  loss: 0.025390, mae: 0.127566, mean_q: 0.195405\n",
      "done, took 4.451 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.629s, episode steps: 256, steps per second:  55, episode reward:  5.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 5.254 [0.000, 11.000],  loss: 0.024874, mae: 0.128450, mean_q: 0.195207\n",
      "done, took 4.631 seconds\n",
      "  g_loss: 3.9061902\n",
      "Evasions: 34\n",
      "Time left = 1.93 hours\n",
      "Total Time Taken: 3.2 minutes\n",
      "epoch_number: 4 completed\n",
      "======================================================\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.697s, episode steps: 256, steps per second:  54, episode reward:  8.000, mean reward:  0.031 [ 0.000,  1.000], mean action: 5.273 [0.000, 11.000],  loss: 0.025023, mae: 0.121197, mean_q: 0.189313\n",
      "done, took 4.699 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.335s, episode steps: 256, steps per second:  59, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 5.254 [0.000, 11.000],  loss: 0.025278, mae: 0.121280, mean_q: 0.189727\n",
      "done, took 4.336 seconds\n",
      "Training for 256 steps ...\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 256/256: episode: 1, duration: 4.365s, episode steps: 256, steps per second:  59, episode reward:  6.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 5.273 [0.000, 11.000],  loss: 0.022384, mae: 0.119143, mean_q: 0.181734\n",
      "done, took 4.367 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.300s, episode steps: 256, steps per second:  60, episode reward:  9.000, mean reward:  0.035 [ 0.000,  1.000], mean action: 5.285 [0.000, 11.000],  loss: 0.024562, mae: 0.119944, mean_q: 0.180821\n",
      "done, took 4.301 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.186s, episode steps: 256, steps per second:  61, episode reward:  2.000, mean reward:  0.008 [ 0.000,  1.000], mean action: 5.281 [0.000, 11.000],  loss: 0.022477, mae: 0.113481, mean_q: 0.172778\n",
      "done, took 4.188 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.313s, episode steps: 256, steps per second:  59, episode reward:  7.000, mean reward:  0.027 [ 0.000,  1.000], mean action: 5.281 [0.000, 11.000],  loss: 0.020476, mae: 0.110860, mean_q: 0.162471\n",
      "done, took 4.314 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.200s, episode steps: 256, steps per second:  61, episode reward:  6.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 5.273 [0.000, 11.000],  loss: 0.021186, mae: 0.111118, mean_q: 0.169068\n",
      "done, took 4.201 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.180s, episode steps: 256, steps per second:  61, episode reward:  3.000, mean reward:  0.012 [ 0.000,  1.000], mean action: 5.254 [0.000, 11.000],  loss: 0.020615, mae: 0.109111, mean_q: 0.166937\n",
      "done, took 4.182 seconds\n",
      "  g_loss: 2.1912558\n",
      "Evasions: 41\n",
      "Time left = 1.87 hours\n",
      "Total Time Taken: 3.9 minutes\n",
      "epoch_number: 5 completed\n",
      "======================================================\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.170s, episode steps: 256, steps per second:  61, episode reward:  5.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 5.285 [0.000, 11.000],  loss: 0.022010, mae: 0.108006, mean_q: 0.169420\n",
      "done, took 4.172 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.153s, episode steps: 256, steps per second:  62, episode reward:  2.000, mean reward:  0.008 [ 0.000,  1.000], mean action: 5.309 [0.000, 11.000],  loss: 0.021312, mae: 0.105417, mean_q: 0.161299\n",
      "done, took 4.154 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.128s, episode steps: 256, steps per second:  62, episode reward:  5.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 5.293 [0.000, 11.000],  loss: 0.018560, mae: 0.104420, mean_q: 0.163102\n",
      "done, took 4.130 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.248s, episode steps: 256, steps per second:  60, episode reward:  5.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 5.277 [0.000, 11.000],  loss: 0.019180, mae: 0.102788, mean_q: 0.164905\n",
      "done, took 4.250 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.180s, episode steps: 256, steps per second:  61, episode reward:  6.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 5.285 [0.000, 11.000],  loss: 0.018722, mae: 0.100055, mean_q: 0.158630\n",
      "done, took 4.183 seconds\n",
      "Training for 256 steps ...\n",
      " 256/256: episode: 1, duration: 4.489s, episode steps: 256, steps per second:  57, episode reward:  4.000, mean reward:  0.016 [ 0.000,  1.000], mean action: 5.281 [0.000, 11.000],  loss: 0.018271, mae: 0.099618, mean_q: 0.151311\n",
      "done, took 4.490 seconds\n",
      "Training for 256 steps ...\n"
     ]
    }
   ],
   "source": [
    "import header\n",
    "import importlib\n",
    "importlib.reload(header) # For reloading after making changes\n",
    "from header import *\n",
    "\n",
    "\n",
    "gpu_device = '/device:GPU:1'\n",
    "physical_devices = tf.config.list_physical_devices('GPU') \n",
    "for gpu_instance in physical_devices: \n",
    "    tf.config.experimental.set_memory_growth(gpu_instance, True)\n",
    "\n",
    "#----------------------------------\n",
    "# Set neurons and batch size\n",
    "#----------------------------------\n",
    "base_n_count = 256\n",
    "batch_size =  256\n",
    "#----------------------------------\n",
    "\n",
    "\n",
    "result = train_data\n",
    "\n",
    "remaining = train_data.shape[0] % batch_size\n",
    "\n",
    "if remaining > 0:\n",
    "    if remaining < train_data.shape[0]:\n",
    "        additional = batch_size - remaining\n",
    "        _additional = train_data.loc[train_data.shape[0]-additional:train_data.shape[0],: ]  \n",
    "        \n",
    "        frames = [train_data, _additional]\n",
    "        result = pd.concat(frames).reset_index(drop=True)\n",
    "\n",
    "print('Result: ' + str(result.shape))\n",
    "\n",
    "# batch_size = 1\n",
    "# ---------------------------------\n",
    "nb_steps = required_epochs * result.shape[0] // batch_size\n",
    "\n",
    "log_interval = result.shape[0] // batch_size # We are setting this as an epoch. This depends on data size.\n",
    "\n",
    "print(\"log_interval : \" + str(log_interval))\n",
    "\n",
    "# nb_steps = TRAINING_ITERATIONS  # 50000 # Add one for logging of the last interval\n",
    "print(\"Total Batch Iterations: \" + str(nb_steps))\n",
    "rand_noise_dim = 100 \n",
    "\n",
    "\n",
    "k_d = 1  # number of critic network updates per adversarial training step\n",
    "k_g = 1  # number of generator network updates per adversarial training step\n",
    "\n",
    "critic_pre_train_steps = 100# 100  # number of steps to pre-train the critic before starting adversarial training\n",
    "\n",
    "generator_model_path, discriminator_model_path, loss_pickle_path = None, None, None\n",
    "\n",
    "show = True \n",
    "train = result#.copy().reset_index(drop=True) # botnet only with labels from classification\n",
    "\n",
    "\n",
    "\n",
    "label_cols = [ i for i in train.columns if 'Label' in i ]\n",
    "\n",
    "data_cols = [ i for i in train.columns if i not in label_cols ]\n",
    "\n",
    "print(data_cols)\n",
    "\n",
    "train_no_label = train[ data_cols ]\n",
    "\n",
    "train_no_label = round(train_no_label, 4)\n",
    "\n",
    "# if SAVE_ONLY_BOT_DATA:\n",
    "#     train_no_label.to_csv(str(DATA_SET_PATH) + 'ONLY_BOTNET_DATA_(Preprocessed).csv')\n",
    "#     print('File: ' + 'ONLY_BOTNET_DATA_(Preprocessed).csv saved to directory')   \n",
    "\n",
    "\n",
    "\n",
    "test_size = train.shape[0] \n",
    "learning_rate = 5e-4\n",
    "\n",
    "\n",
    "TODAY = DATA_SET + '_' + str(datetime.datetime.now()) \n",
    "\n",
    "print(TODAY)\n",
    "\n",
    "\n",
    "arguments = [rand_noise_dim, nb_steps, batch_size, \n",
    "            k_d, k_g, critic_pre_train_steps, log_interval, learning_rate, base_n_count,\n",
    "            CACHE_PATH, FIGS_PATH, show, test_size, gpu_device, EVALUATION_PARAMETER, TODAY, DATA_SET ]\n",
    "\n",
    "best_losses = train_RELEVAGAN_CC(arguments, train, Train, data_cols)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "celltoolbar": "Slideshow",
  "colab": {
   "collapsed_sections": [],
   "name": "RELEVAGAN.ipynb",
   "provenance": []
  },
  "interpreter": {
   "hash": "0754585e5bea998e5d67e8f88be1e2a4051f453a7d5aedf516d053743049d686"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
